From e39609c4502b7abab4133f77587084047a6c3e79 Mon Sep 17 00:00:00 2001 From: Wei Huang Date: Mon, 9 May 2011 11:37:03 +0100 Subject: [PATCH] x86/fpu: extract extended related code into xstate.h and xstate.c Current extended code is mixing with FPU code in i387.c. As part of FPU code cleanup, this patch moves all extended state code into independent files. Not much semantic are changed and most function names are kept untouched, except for xsave() and xsaveopt(). These two functions are combined into a single function. Signed-off-by: Wei Huang --- xen/arch/x86/Makefile | 1 + xen/arch/x86/cpu/common.c | 4 +- xen/arch/x86/domain.c | 7 +- xen/arch/x86/domctl.c | 2 +- xen/arch/x86/hvm/hvm.c | 3 +- xen/arch/x86/hvm/vmx/vmcs.c | 1 + xen/arch/x86/i387.c | 147 +--------------------------- xen/arch/x86/traps.c | 1 + xen/arch/x86/xstate.c | 183 +++++++++++++++++++++++++++++++++++ xen/include/asm-x86/i387.h | 68 +------------ xen/include/asm-x86/xstate.h | 68 +++++++++++++ 11 files changed, 267 insertions(+), 218 deletions(-) create mode 100644 xen/arch/x86/xstate.c create mode 100644 xen/include/asm-x86/xstate.h diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile index 2d6b14515c..8691cd3248 100644 --- a/xen/arch/x86/Makefile +++ b/xen/arch/x86/Makefile @@ -56,6 +56,7 @@ obj-y += machine_kexec.o obj-y += crash.o obj-y += tboot.o obj-y += hpet.o +obj-y += xstate.o obj-$(crash_debug) += gdbstub.o diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c index b94f6b7542..5e60a8e1e9 100644 --- a/xen/arch/x86/cpu/common.c +++ b/xen/arch/x86/cpu/common.c @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include @@ -354,7 +354,7 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) clear_bit(X86_FEATURE_XSAVE, boot_cpu_data.x86_capability); if ( cpu_has_xsave ) - xsave_init(); + xstate_init(); /* * The vendor-specific functions might have changed features. Now diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index d88bcec410..56031c3db4 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -419,7 +420,7 @@ int vcpu_initialise(struct vcpu *v) v->arch.perdomain_ptes = perdomain_ptes(d, v); - if ( (rc = xsave_alloc_save_area(v)) != 0 ) + if ( (rc = xstate_alloc_save_area(v)) != 0 ) return rc; if ( v->arch.xsave_area ) v->arch.fpu_ctxt = &v->arch.xsave_area->fpu_sse; @@ -485,7 +486,7 @@ int vcpu_initialise(struct vcpu *v) if ( rc ) { if ( v->arch.xsave_area ) - xsave_free_save_area(v); + xstate_free_save_area(v); else xfree(v->arch.fpu_ctxt); if ( !is_hvm_domain(d) && standalone_trap_ctxt(v) ) @@ -501,7 +502,7 @@ void vcpu_destroy(struct vcpu *v) release_compat_l4(v); if ( v->arch.xsave_area ) - xsave_free_save_area(v); + xstate_free_save_area(v); else xfree(v->arch.fpu_ctxt); diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c index 975aa4596a..d13599aed3 100644 --- a/xen/arch/x86/domctl.c +++ b/xen/arch/x86/domctl.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include #ifdef XEN_KDB_CONFIG #include "../kdb/include/kdbdefs.h" diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index 6d43f52137..7cbdce3bed 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -2427,7 +2428,7 @@ void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx, if ( count == 0 && v->arch.xcr0 ) { /* reset EBX to default value first */ - *ebx = XSAVE_AREA_MIN_SIZE; + *ebx = XSTATE_AREA_MIN_SIZE; for ( sub_leaf = 2; sub_leaf < 64; sub_leaf++ ) { if ( !(v->arch.xcr0 & (1ULL << sub_leaf)) ) diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c index d139c1380d..b1d0efc241 100644 --- a/xen/arch/x86/hvm/vmx/vmcs.c +++ b/xen/arch/x86/hvm/vmx/vmcs.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/xen/arch/x86/i387.c b/xen/arch/x86/i387.c index 19701da6e9..c04eadc721 100644 --- a/xen/arch/x86/i387.c +++ b/xen/arch/x86/i387.c @@ -14,42 +14,9 @@ #include #include #include +#include #include -static bool_t __read_mostly cpu_has_xsaveopt; - -static void xsave(struct vcpu *v) -{ - struct xsave_struct *ptr = v->arch.xsave_area; - - asm volatile ( - ".byte " REX_PREFIX "0x0f,0xae,0x27" - : - : "a" (-1), "d" (-1), "D"(ptr) - : "memory" ); -} - -static void xsaveopt(struct vcpu *v) -{ - struct xsave_struct *ptr = v->arch.xsave_area; - - asm volatile ( - ".byte " REX_PREFIX "0x0f,0xae,0x37" - : - : "a" (-1), "d" (-1), "D"(ptr) - : "memory" ); -} - -static void xrstor(struct vcpu *v) -{ - struct xsave_struct *ptr = v->arch.xsave_area; - - asm volatile ( - ".byte " REX_PREFIX "0x0f,0xae,0x2f" - : - : "m" (*ptr), "a" (-1), "d" (-1), "D"(ptr) ); -} - static void load_mxcsr(unsigned long val) { val &= 0xffbf; @@ -122,10 +89,7 @@ void save_init_fpu(struct vcpu *v) * we set all accumulated feature mask before doing save/restore. */ set_xcr0(v->arch.xcr0_accum); - if ( cpu_has_xsaveopt ) - xsaveopt(v); - else - xsave(v); + xsave(v); set_xcr0(v->arch.xcr0); } else if ( cpu_has_fxsr ) @@ -220,113 +184,6 @@ static void restore_fpu(struct vcpu *v) } } -#define XSTATE_CPUID 0xd - -/* - * Maximum size (in byte) of the XSAVE/XRSTOR save area required by all - * the supported and enabled features on the processor, including the - * XSAVE.HEADER. We only enable XCNTXT_MASK that we have known. - */ -u32 xsave_cntxt_size; - -/* A 64-bit bitmask of the XSAVE/XRSTOR features supported by processor. */ -u64 xfeature_mask; - -/* Cached xcr0 for fast read */ -DEFINE_PER_CPU(uint64_t, xcr0); - -void xsave_init(void) -{ - u32 eax, ebx, ecx, edx; - int cpu = smp_processor_id(); - u32 min_size; - - if ( boot_cpu_data.cpuid_level < XSTATE_CPUID ) - return; - - cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); - - BUG_ON((eax & XSTATE_FP_SSE) != XSTATE_FP_SSE); - BUG_ON((eax & XSTATE_YMM) && !(eax & XSTATE_SSE)); - - /* FP/SSE, XSAVE.HEADER, YMM */ - min_size = XSAVE_AREA_MIN_SIZE; - if ( eax & XSTATE_YMM ) - min_size += XSTATE_YMM_SIZE; - BUG_ON(ecx < min_size); - - /* - * Set CR4_OSXSAVE and run "cpuid" to get xsave_cntxt_size. - */ - set_in_cr4(X86_CR4_OSXSAVE); - set_xcr0((((u64)edx << 32) | eax) & XCNTXT_MASK); - cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); - - if ( cpu == 0 ) - { - /* - * xsave_cntxt_size is the max size required by enabled features. - * We know FP/SSE and YMM about eax, and nothing about edx at present. - */ - xsave_cntxt_size = ebx; - xfeature_mask = eax + ((u64)edx << 32); - xfeature_mask &= XCNTXT_MASK; - printk("%s: using cntxt_size: 0x%x and states: 0x%"PRIx64"\n", - __func__, xsave_cntxt_size, xfeature_mask); - - /* Check XSAVEOPT feature. */ - cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); - cpu_has_xsaveopt = !!(eax & XSAVEOPT); - } - else - { - BUG_ON(xsave_cntxt_size != ebx); - BUG_ON(xfeature_mask != (xfeature_mask & XCNTXT_MASK)); - } -} - -int xsave_alloc_save_area(struct vcpu *v) -{ - void *save_area; - - if ( !cpu_has_xsave || is_idle_vcpu(v) ) - return 0; - - BUG_ON(xsave_cntxt_size < XSAVE_AREA_MIN_SIZE); - - /* XSAVE/XRSTOR requires the save area be 64-byte-boundary aligned. */ - save_area = _xmalloc(xsave_cntxt_size, 64); - if ( save_area == NULL ) - return -ENOMEM; - - memset(save_area, 0, xsave_cntxt_size); - ((u32 *)save_area)[6] = 0x1f80; /* MXCSR */ - *(uint64_t *)(save_area + 512) = XSTATE_FP_SSE; /* XSETBV */ - - v->arch.xsave_area = save_area; - v->arch.xcr0 = XSTATE_FP_SSE; - v->arch.xcr0_accum = XSTATE_FP_SSE; - - return 0; -} - -void xsave_free_save_area(struct vcpu *v) -{ - xfree(v->arch.xsave_area); - v->arch.xsave_area = NULL; -} - -bool_t xsave_enabled(const struct vcpu *v) -{ - if ( cpu_has_xsave ) - { - ASSERT(xsave_cntxt_size >= XSAVE_AREA_MIN_SIZE); - ASSERT(v->arch.xsave_area); - } - - return cpu_has_xsave; -} - /* * Local variables: * mode: C diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index 129dc999ec..0e2e14773e 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include #include diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c new file mode 100644 index 0000000000..e358fd7bbd --- /dev/null +++ b/xen/arch/x86/xstate.c @@ -0,0 +1,183 @@ +/* + * arch/x86/xstate.c + * + * x86 extended state operations + * + */ + +#include +#include +#include +#include +#include +#include +#include + +bool_t __read_mostly cpu_has_xsaveopt; + +/* + * Maximum size (in byte) of the XSAVE/XRSTOR save area required by all + * the supported and enabled features on the processor, including the + * XSAVE.HEADER. We only enable XCNTXT_MASK that we have known. + */ +u32 xsave_cntxt_size; + +/* A 64-bit bitmask of the XSAVE/XRSTOR features supported by processor. */ +u64 xfeature_mask; + +/* Cached xcr0 for fast read */ +DEFINE_PER_CPU(uint64_t, xcr0); + +/* Because XCR0 is cached for each CPU, xsetbv() is not exposed. Users should + * use set_xcr0() instead. + */ +static inline void xsetbv(u32 index, u64 xfeatures) +{ + u32 hi = xfeatures >> 32; + u32 lo = (u32)xfeatures; + + asm volatile (".byte 0x0f,0x01,0xd1" :: "c" (index), + "a" (lo), "d" (hi)); +} + +inline void set_xcr0(u64 xfeatures) +{ + this_cpu(xcr0) = xfeatures; + xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures); +} + +inline uint64_t get_xcr0(void) +{ + return this_cpu(xcr0); +} + +void xsave(struct vcpu *v) +{ + struct xsave_struct *ptr = v->arch.xsave_area; + + if ( cpu_has_xsaveopt ) + asm volatile ( + ".byte " REX_PREFIX "0x0f,0xae,0x37" + : + : "a" (-1), "d" (-1), "D"(ptr) + : "memory" ); + else + asm volatile ( + ".byte " REX_PREFIX "0x0f,0xae,0x27" + : + : "a" (-1), "d" (-1), "D"(ptr) + : "memory" ); +} + +void xrstor(struct vcpu *v) +{ + struct xsave_struct *ptr = v->arch.xsave_area; + + asm volatile ( + ".byte " REX_PREFIX "0x0f,0xae,0x2f" + : + : "m" (*ptr), "a" (-1), "d" (-1), "D"(ptr) ); +} + +bool_t xsave_enabled(const struct vcpu *v) +{ + if ( cpu_has_xsave ) + { + ASSERT(xsave_cntxt_size >= XSTATE_AREA_MIN_SIZE); + ASSERT(v->arch.xsave_area); + } + + return cpu_has_xsave; +} + +int xstate_alloc_save_area(struct vcpu *v) +{ + void *save_area; + + if ( !cpu_has_xsave || is_idle_vcpu(v) ) + return 0; + + BUG_ON(xsave_cntxt_size < XSTATE_AREA_MIN_SIZE); + + /* XSAVE/XRSTOR requires the save area be 64-byte-boundary aligned. */ + save_area = _xmalloc(xsave_cntxt_size, 64); + if ( save_area == NULL ) + return -ENOMEM; + + memset(save_area, 0, xsave_cntxt_size); + ((u32 *)save_area)[6] = 0x1f80; /* MXCSR */ + *(uint64_t *)(save_area + 512) = XSTATE_FP_SSE; /* XSETBV */ + + v->arch.xsave_area = save_area; + v->arch.xcr0 = XSTATE_FP_SSE; + v->arch.xcr0_accum = XSTATE_FP_SSE; + + return 0; +} + +void xstate_free_save_area(struct vcpu *v) +{ + xfree(v->arch.xsave_area); + v->arch.xsave_area = NULL; +} + +/* Collect the information of processor's extended state */ +void xstate_init(void) +{ + u32 eax, ebx, ecx, edx; + int cpu = smp_processor_id(); + u32 min_size; + + if ( boot_cpu_data.cpuid_level < XSTATE_CPUID ) + return; + + cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); + + BUG_ON((eax & XSTATE_FP_SSE) != XSTATE_FP_SSE); + BUG_ON((eax & XSTATE_YMM) && !(eax & XSTATE_SSE)); + + /* FP/SSE, XSAVE.HEADER, YMM */ + min_size = XSTATE_AREA_MIN_SIZE; + if ( eax & XSTATE_YMM ) + min_size += XSTATE_YMM_SIZE; + BUG_ON(ecx < min_size); + + /* + * Set CR4_OSXSAVE and run "cpuid" to get xsave_cntxt_size. + */ + set_in_cr4(X86_CR4_OSXSAVE); + set_xcr0((((u64)edx << 32) | eax) & XCNTXT_MASK); + cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); + + if ( cpu == 0 ) + { + /* + * xsave_cntxt_size is the max size required by enabled features. + * We know FP/SSE and YMM about eax, and nothing about edx at present. + */ + xsave_cntxt_size = ebx; + xfeature_mask = eax + ((u64)edx << 32); + xfeature_mask &= XCNTXT_MASK; + printk("%s: using cntxt_size: 0x%x and states: 0x%"PRIx64"\n", + __func__, xsave_cntxt_size, xfeature_mask); + + /* Check XSAVEOPT feature. */ + cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); + cpu_has_xsaveopt = !!(eax & XSTATE_FEATURE_XSAVEOPT); + } + else + { + BUG_ON(xsave_cntxt_size != ebx); + BUG_ON(xfeature_mask != (xfeature_mask & XCNTXT_MASK)); + } +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/include/asm-x86/i387.h b/xen/include/asm-x86/i387.h index 8e10d927e1..f94cdad6f6 100644 --- a/xen/include/asm-x86/i387.h +++ b/xen/include/asm-x86/i387.h @@ -14,71 +14,7 @@ #include #include -struct vcpu; - -extern unsigned int xsave_cntxt_size; -extern u64 xfeature_mask; - -void xsave_init(void); -int xsave_alloc_save_area(struct vcpu *v); -void xsave_free_save_area(struct vcpu *v); -bool_t xsave_enabled(const struct vcpu *v); - -#define XSAVE_AREA_MIN_SIZE (512 + 64) /* FP/SSE + XSAVE.HEADER */ -#define XSTATE_FP (1ULL << 0) -#define XSTATE_SSE (1ULL << 1) -#define XSTATE_YMM (1ULL << 2) -#define XSTATE_LWP (1ULL << 62) /* AMD lightweight profiling */ -#define XSTATE_FP_SSE (XSTATE_FP | XSTATE_SSE) -#define XCNTXT_MASK (XSTATE_FP | XSTATE_SSE | XSTATE_YMM | XSTATE_LWP) -#define XSTATE_YMM_OFFSET XSAVE_AREA_MIN_SIZE -#define XSTATE_YMM_SIZE 256 -#define XSAVEOPT (1 << 0) - -struct xsave_struct -{ - struct { char x[512]; } fpu_sse; /* FPU/MMX, SSE */ - - struct { - u64 xstate_bv; - u64 reserved[7]; - } xsave_hdr; /* The 64-byte header */ - - struct { char x[XSTATE_YMM_SIZE]; } ymm; /* YMM */ - char data[]; /* Future new states */ -} __attribute__ ((packed, aligned (64))); - -#define XCR_XFEATURE_ENABLED_MASK 0 - -#ifdef CONFIG_X86_64 -#define REX_PREFIX "0x48, " -#else -#define REX_PREFIX -#endif - -DECLARE_PER_CPU(uint64_t, xcr0); - -static inline void xsetbv(u32 index, u64 xfeatures) -{ - u32 hi = xfeatures >> 32; - u32 lo = (u32)xfeatures; - - asm volatile (".byte 0x0f,0x01,0xd1" :: "c" (index), - "a" (lo), "d" (hi)); -} - -static inline void set_xcr0(u64 xfeatures) -{ - this_cpu(xcr0) = xfeatures; - xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures); -} - -static inline uint64_t get_xcr0(void) -{ - return this_cpu(xcr0); -} - -extern void setup_fpu(struct vcpu *v); -extern void save_init_fpu(struct vcpu *v); +void setup_fpu(struct vcpu *v); +void save_init_fpu(struct vcpu *v); #endif /* __ASM_I386_I387_H */ diff --git a/xen/include/asm-x86/xstate.h b/xen/include/asm-x86/xstate.h new file mode 100644 index 0000000000..a0677173fd --- /dev/null +++ b/xen/include/asm-x86/xstate.h @@ -0,0 +1,68 @@ +/* + * include/asm-i386/xstate.h + * + * x86 extended state (xsave/xrstor) related definitions + * + */ + +#ifndef __ASM_XSTATE_H +#define __ASM_XSTATE_H + +#include +#include + +#define XSTATE_CPUID 0x0000000d +#define XSTATE_FEATURE_XSAVEOPT (1 << 0) /* sub-leaf 1, eax[bit 0] */ + +#define XCR_XFEATURE_ENABLED_MASK 0x00000000 /* index of XCR0 */ + +#define XSTATE_YMM_SIZE 256 +#define XSTATE_YMM_OFFSET XSAVE_AREA_MIN_SIZE +#define XSTATE_AREA_MIN_SIZE (512 + 64) /* FP/SSE + XSAVE.HEADER */ + +#define XSTATE_FP (1ULL << 0) +#define XSTATE_SSE (1ULL << 1) +#define XSTATE_YMM (1ULL << 2) +#define XSTATE_LWP (1ULL << 62) /* AMD lightweight profiling */ +#define XSTATE_FP_SSE (XSTATE_FP | XSTATE_SSE) +#define XCNTXT_MASK (XSTATE_FP | XSTATE_SSE | XSTATE_YMM | XSTATE_LWP) + +#ifdef CONFIG_X86_64 +#define REX_PREFIX "0x48, " +#else +#define REX_PREFIX +#endif + +/* extended state variables */ +DECLARE_PER_CPU(uint64_t, xcr0); + +extern unsigned int xsave_cntxt_size; +extern u64 xfeature_mask; + +/* extended state save area */ +struct xsave_struct +{ + struct { char x[512]; } fpu_sse; /* FPU/MMX, SSE */ + + struct { + u64 xstate_bv; + u64 reserved[7]; + } xsave_hdr; /* The 64-byte header */ + + struct { char x[XSTATE_YMM_SIZE]; } ymm; /* YMM */ + char data[]; /* Future new states */ +} __attribute__ ((packed, aligned (64))); + +/* extended state operations */ +void set_xcr0(u64 xfeatures); +uint64_t get_xcr0(void); +void xsave(struct vcpu *v); +void xrstor(struct vcpu *v); +bool_t xsave_enabled(const struct vcpu *v); + +/* extended state init and cleanup functions */ +void xstate_free_save_area(struct vcpu *v); +int xstate_alloc_save_area(struct vcpu *v); +void xstate_init(void); + +#endif /* __ASM_XSTATE_H */ -- 2.30.2